/* Copyright 2012 Tobias Marschall
 * 
 * This file is part of CLEVER.
 * 
 * CLEVER is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * CLEVER is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with CLEVER.  If not, see <http://www.gnu.org/licenses/>.
 */

#include <iostream>
#include <fstream>
#include <vector>
#include <cassert>

#include <boost/program_options.hpp>
#include <boost/unordered_set.hpp>
#include <boost/tokenizer.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/dynamic_bitset.hpp>

#include "Variation.h"
#include "VariationUtils.h"
#include "VariationListParser.h"
#include "LengthAwareVariationIndex.h"
#include "VersionInfo.h"

using namespace std;
using namespace boost;
namespace po = boost::program_options;

void usage(const char* name, const po::options_description& options_desc) {
	cerr << "Usage: " << name << " [options] <variants-file>" << endl;
	cerr << endl;
	cerr << "Reads a list of variations (e.g. as computed by laser-core), reads CLEVER output from stdin" << endl;
	cerr << "and outputs those variations that are similar to CLEVER predictions." << endl;
	cerr << endl;
	cerr << options_desc << endl;
	exit(1);
}


int main(int argc, char* argv[]) {
	VersionInfo::checkAndPrintVersion("filter-variations", cerr);
	string commandline = VersionInfo::commandline(argc, argv);

	// PARAMETERS
	int max_distance;
	int max_length_diff;
	int min_length;
	
	po::options_description options_desc("Allowed options");
	options_desc.add_options()
		("max_offset,o", po::value<int>(&max_distance)->default_value(100), "Maximum allowed distance.")
		("max_length_diff,z", po::value<int>(&max_length_diff)->default_value(20), "Maximum allowed length difference.")
		("min_length,l", po::value<int>(&min_length)->default_value(10), "Minimum length of variations to be written.")
	;

	if (argc<2) {
		usage(argv[0], options_desc);
	}
	string variants_filename(argv[argc-1]);
	argc -= 1;

	po::variables_map options;
	try {
		po::store(po::parse_command_line(argc, argv, options_desc), options);
		po::notify(options);
	} catch(std::exception& e) {
		cerr << "error: " << e.what() << "\n";
		return 1;
	}
	cerr << "Commandline: " << commandline << endl;

	ifstream variants_stream(variants_filename.c_str());
	if (variants_stream.fail()) {
		cerr << "Error: could not open \"" << variants_filename << "\"." << endl;
		return 1;
	}
	auto_ptr<vector<Variation> > variations = VariationListParser::parse(variants_stream, false, min_length);
	cerr << "Read " << variations->size() << " variations." << endl;

	LengthAwareVariationIndex index(*variations);
	boost::dynamic_bitset<> selected_variants(variations->size());
	
	auto_ptr<vector<Variation> > result(new vector<Variation>());
	typedef boost::tokenizer<boost::char_separator<char> > tokenizer_t;
	boost::char_separator<char> whitespace_separator(" \t");
	string line;
	int linenr = 0;
	while (getline(cin,line)) {
		linenr += 1;
		tokenizer_t tokenizer(line, whitespace_separator);
		vector<string> tokens(tokenizer.begin(), tokenizer.end());
		if (tokens.size() < 4) {
			ostringstream oss;
			oss << "Error parsing variation list. Offending line: " << linenr;
			throw std::runtime_error(oss.str());
		}
		try {
			if (tokens[3].compare("DEL") == 0) {
				cerr << "CLEVER deletion: " << line << endl;
				const string& chromosome = tokens[0];
				int start = boost::lexical_cast<int>(tokens[1]) - 1;
				int end = boost::lexical_cast<int>(tokens[2]);
				int length1 = end - start;
				double center1 = ((double)(start + end)) / 2.0;
				int windowsize = max_distance + (length1 + max_length_diff) / 2 + 1;
				auto_ptr<vector<size_t> > candidates = index.containedIn(chromosome, start-windowsize, end+windowsize, max(length1-max_length_diff, min_length), length1+max_length_diff);
				if (candidates.get() == 0) continue;
				for (size_t i=0; i<candidates->size(); ++i) {
					assert(candidates->at(i) < variations->size());
					const Variation& v = variations->at(candidates->at(i));
					if (v.getType() != Variation::DELETION) continue;
					int length2 = v.getCoordinate2() - v.getCoordinate1();
					if (abs(length1-length2) > max_length_diff) continue;
					int center2 = ((double)(v.getCoordinate1() + v.getCoordinate2())) / 2.0;
					if (abs(center1-center2) > max_distance) continue;
					cerr << "   --> " << v << endl;
					selected_variants.set(candidates->at(i), true);
				}
			} else if (tokens[3].compare("INS") == 0) {
				cerr << "CLEVER insertion: " << line << endl;
				const string& chromosome = tokens[0];
				int pos1 = boost::lexical_cast<int>(tokens[1]) - 1;
				int length1 = boost::lexical_cast<int>(tokens[2]);
				int windowsize = max_distance + 1;
				auto_ptr<vector<size_t> > candidates = index.containedIn(chromosome, pos1-windowsize, pos1+windowsize, -length1-max_length_diff, -max(length1-max_length_diff, min_length));
				if (candidates.get() == 0) continue;
				for (size_t i=0; i<candidates->size(); ++i) {
					assert(candidates->at(i) < variations->size());
					const Variation& v = variations->at(candidates->at(i));
					if (v.getType() != Variation::INSERTION) continue;
					int length2 = v.getCoordinate2();
					if (abs(length1-length2) > max_length_diff) continue;
					int pos2 = v.getCoordinate1();
					if (abs(pos1-pos2) > max_distance) continue;
					cerr << "   --> " << v << endl;
					selected_variants.set(candidates->at(i), true);
				}
			} else {
				continue;
			}
		} catch(boost::bad_lexical_cast &){
			ostringstream oss;
			oss << "Error parsing variation list. Offending line: " << linenr;
			throw std::runtime_error(oss.str());
		}
	}

	for (size_t i=0; i<variations->size(); ++i) {
		if (selected_variants[i]) {
			cout << variations->at(i) << endl;
		}
	}
	
	return 0;
}
